# -*- coding: utf-8 -*-
import scrapy
from urllib.parse import quote

from mafengwo.items import MafengwoItem


class MfwSpider(scrapy.Spider):
    name = 'mfw'
    allowed_domains = ['www.mafengwo.cn']
    keywords = input('请输入城市名：')
    start_urls = ['http://www.mafengwo.cn/search/s.php?q=' + quote(keywords) + '&p=1&t=poi&kt=1']
    url = 'http://www.mafengwo.cn/search/s.php?q=' + quote(keywords) + '&p={}&t=poi&kt=1'
    page = 1

    def parse(self, response):
        href_list = response.xpath('//div[@class="att-list"]/ul/li//h3/a/@href').extract()
        for href in href_list:
            yield scrapy.Request(url=href, callback=self.parse_detail)

        if self.page < 50:
            self.page += 1
            url = self.url.format(self.page)
            yield scrapy.Request(url=url, callback=self.parse)

    def parse_detail(self, response):
        # 创建一个对象
        item = MafengwoItem()
        # 景点名字
        item['name'] = response.xpath('//div[@class="title"]/h1/text()').extract_first()
        # 景点介绍
        intro = response.xpath('//div[@class="summary"]/text()').extract_first()
        if intro:
            item['intro'] = intro.replace('\n', '').replace(' ', '').strip('·')
        else:
            return None
        # 景点地址
        item['addr'] = response.xpath('//div[@class="mhd"]/p/text()').extract_first()
        # 景点用时
        item['time_cost'] = response.xpath('//li[@class="item-time"]/div[@class="content"]/text()').extract_first()
        # print(item['intro'])
        # print(item['name'])
        # print(item['addr'])
        # print(item['time_cost'])
        yield item


